export VLLM_ATTENTION_BACKEND=XFORMERS
export PORJECT_PATH=/llm/nankai/xuyang_space/project/r1_infra/tinyzero

# export WANDB_MODE=offline
export TMPDIR=/home/xuyang/ray_tmp

export ROLLOUT_TP_SIZE=1
export CUDA_VISIBLE_DEVICES=3
export N_GPUS=1

export MODEL_ROOT=/llm/nankai/xuyang_space/LLMs
export MODEL_NAME=Qwen2.5-3B
export BASE_MODEL=$MODEL_ROOT/$MODEL_NAME


# export DATA_ROOT=/llm/nankai/xuyang_space/data
# export DATA_NAME=gsm8k
# export DATA_DIR=$DATA_ROOT/$DATA_NAME


export DATA_DIR=/llm/nankai/xuyang_space/project/TinyZero/dataset


export PROJECT_NAME=TinyZero
export LOG_PATH=$PORJECT_PATH

export LEARNING_RATE=1e-6
export ROLLOUT_N=5
export BATCH_SIZE=128

export DATA_SHUFFLE=False


export MAX_RESPONSE_LENGTH=1024

export algorithm=grpo

export TODAY_DATA=0705
export EXPERIMENT_NAME=$MODEL_NAME-$algorithm-$MAX_RESPONSE_LENGTH-$LEARNING_RATE-$BATCH_SIZE-$ROLLOUT_N-$DATA_SHUFFLE-$TODAY_DATA
export OUTPUT_DIR=$PORJECT_PATH/outputs/$EXPERIMENT_NAME



export CUSTOM_REWARD_FUNCTION=$PORJECT_PATH/reward_score/countdown.py


bash /llm/nankai/xuyang_space/project/r1_infra/tinyzero/scripts/run_qwen2-3b_$algorithm.sh